packages <- c(
"CIMseq", "CIMseq.data", "printr", "ggthemes", "tidyverse"
)
purrr::walk(packages, library, character.only = TRUE)
rm(packages)
##DATA
load('../data/CIMseqData.rda')
load('../data/sObj.rda')
#rename classes
renameClasses <- function(class) {
case_when(
class == "0" ~ "C.Lgr5.proximal.1",
class == "1" ~ "C.Lgr5.proximal.2",
class == "2" ~ "C.Colonocytes.early.proximal",
class == "3" ~ "SI.Stem",
class == "4" ~ "C.Lgr5.distal",
class == "5" ~ "C.Goblet.distal.1",
class == "6" ~ "C.Colonocytes.late.proximal",
class == "7" ~ "SI.Lgr5.Mki67",
class == "8" ~ "C.TA.proximal",
class == "9" ~ "C.TA.distal",
class == "10" ~ "SI.TA.1",
class == "11" ~ "C.Colonocytes.distal",
class == "12" ~ "C.Lgr5.Mki67.proximal.2",
class == "13" ~ "C.Goblet.distal.2",
class == "14" ~ "C.Goblet.proximal.1",
class == "15" ~ "C.Goblet.proximal.2",
class == "16" ~ "C.Lgr5.Mki67.proximal.1",
class == "17" ~ "SI.Goblet",
class == "18" ~ "SI.Enterocytes.early",
class == "19" ~ "C.Goblet.distal.3",
class == "20" ~ "C.Goblet.distal.4",
class == "21" ~ "Tufft",
class == "22" ~ "SI.TA.2",
class == "23" ~ "C.Goblet.distal.5",
class == "24" ~ "Enteroendocrine",
class == "25" ~ "SI.Enterocytes.late",
class == "26" ~ "C.Goblet.distal.6",
class == "27" ~ "SI.Paneth",
class == "28" ~ "Blood",
TRUE ~ "error"
)
}
c.order <- c(
"SI.Goblet", "SI.Paneth", "SI.Stem", "SI.Lgr5.Mki67", "SI.TA.1", "SI.TA.2", "SI.Enterocytes.early", "SI.Enterocytes.late",
"C.Goblet.proximal.1", "C.Goblet.proximal.2", "C.Lgr5.proximal.1", "C.Lgr5.proximal.2", "C.Lgr5.Mki67.proximal.1", "C.Lgr5.Mki67.proximal.2", "C.TA.proximal", "C.Colonocytes.early.proximal", "C.Colonocytes.late.proximal",
"C.Goblet.distal.1", "C.Goblet.distal.2", "C.Goblet.distal.3", "C.Goblet.distal.4", "C.Goblet.distal.5", "C.Goblet.distal.6", "C.Lgr5.distal", "C.TA.distal", "C.Colonocytes.distal",
"Tufft", "Enteroendocrine", "Blood"
)
getData(cObjSng, "classification") <- renameClasses(getData(cObjSng, "classification"))
fractions <- getData(sObj, "fractions")
colnames(fractions) <- renameClasses(colnames(fractions))
sObj@fractions <- fractions
plotUnsupervisedClass(cObjSng, cObjMul)
## Warning in .checkEstimateCellsInput(counts.ercc): Results will not be
## accurate. These samples ERCC reads are all 0's: s.SRR5103562, s.SRR5102813,
## s.SRR5102362, s.SRR5102747, s.SRR5103063, ...
plotUnsupervisedMarkers(
cObjSng, cObjMul,
c("Lgr5", "Ptprc", "Chga", "Dclk1", "Slc26a3", "Atoh1", "Alpi", "Lyz1"),
pal = RColorBrewer::brewer.pal(8, "Set1")
)
## Warning in .checkEstimateCellsInput(counts.ercc): Results will not be
## accurate. These samples ERCC reads are all 0's: s.SRR5103562, s.SRR5102813,
## s.SRR5102362, s.SRR5102747, s.SRR5103063, ...
plotUnsupervisedMarkers(
cObjSng, cObjMul, c("Mki67"),
pal = RColorBrewer::brewer.pal(8, "Set1")
)
## Warning in .checkEstimateCellsInput(counts.ercc): Results will not be
## accurate. These samples ERCC reads are all 0's: s.SRR5103562, s.SRR5102813,
## s.SRR5102362, s.SRR5102747, s.SRR5103063, ...
plotUnsupervisedMarkers(
cObjSng, cObjMul, c("Plet1"),
pal = RColorBrewer::brewer.pal(8, "Set1")
)
## Warning in .checkEstimateCellsInput(counts.ercc): Results will not be
## accurate. These samples ERCC reads are all 0's: s.SRR5103562, s.SRR5102813,
## s.SRR5102362, s.SRR5102747, s.SRR5103063, ...
plotUnsupervisedMarkers(
cObjSng, cObjMul, c("Hoxb13"),
pal = RColorBrewer::brewer.pal(8, "Set1")
)
## Warning in .checkEstimateCellsInput(counts.ercc): Results will not be
## accurate. These samples ERCC reads are all 0's: s.SRR5103562, s.SRR5102813,
## s.SRR5102362, s.SRR5102747, s.SRR5103063, ...
plotUnsupervisedClass(cObjSng, cObjMul) %>%
plotData() %>%
inner_join(MGA.Meta, by = c("Sample" = "sample")) %>%
ggplot() +
geom_point(aes(`dim.red dim 1`, `dim.red dim 2`, colour = unique_key)) +
ggthemes::theme_few() +
scale_colour_manual(values = col40())
## Warning in .checkEstimateCellsInput(counts.ercc): Results will not be
## accurate. These samples ERCC reads are all 0's: s.SRR5103562, s.SRR5102813,
## s.SRR5102362, s.SRR5102747, s.SRR5103063, ...
plotUnsupervisedClass(cObjSng, cObjMul) %>%
plotData() %>%
inner_join(MGA.Meta, by = c("Sample" = "sample")) %>%
mutate(subject_id = as.character(subject_id)) %>%
ggplot() +
geom_point(aes(`dim.red dim 1`, `dim.red dim 2`, colour = subject_id), alpha = 0.3) +
ggthemes::theme_few() +
scale_colour_manual(values = col40())
## Warning in .checkEstimateCellsInput(counts.ercc): Results will not be
## accurate. These samples ERCC reads are all 0's: s.SRR5103562, s.SRR5102813,
## s.SRR5102362, s.SRR5102747, s.SRR5103063, ...
plotUnsupervisedClass(cObjSng, cObjMul) %>%
plotData() %>%
inner_join(MGA.Meta, by = c("Sample" = "sample")) %>%
mutate(subject_age = as.character(subject_age)) %>%
ggplot() +
geom_point(aes(`dim.red dim 1`, `dim.red dim 2`, colour = subject_age)) +
ggthemes::theme_few() +
scale_colour_manual(values = col40())
## Warning in .checkEstimateCellsInput(counts.ercc): Results will not be
## accurate. These samples ERCC reads are all 0's: s.SRR5103562, s.SRR5102813,
## s.SRR5102362, s.SRR5102747, s.SRR5103063, ...
tibble(
sample = colnames(getData(cObjSng, "counts")),
class = getData(cObjSng, "classification")
) %>%
mutate(source = if_else(
str_detect(sample, "SRR654") | str_detect(sample, "SRR510"),
"External", "Enge"
)) %>%
group_by(source, class) %>%
summarize(n = n()) %>%
ungroup() %>%
group_by(source) %>%
mutate(`%` = n / sum(n) * 100) %>%
ggplot() +
geom_bar(aes(source, `%`, fill = source), stat = "identity", position = position_dodge(width = 1)) +
facet_wrap(~class, scales = "free") +
labs(x = "Source", y = "% of dataset") +
theme_bw() +
theme(
legend.position = "top",
axis.title.x = element_blank()
) +
guides(fill = FALSE)
getData(cObjSng, "dim.red") %>%
matrix_to_tibble("sample") %>%
setNames(c("sample", "UMAP.dim1", "UMAP.dim2")) %>%
mutate(source = case_when(
str_detect(sample, "SRR654") ~ "Tabula Muris",
str_detect(sample, "SRR510") ~ "Regev",
TRUE ~ "Enge"
)) %>%
sample_n(nrow(.), FALSE) %>%
ggplot() +
geom_point(aes(UMAP.dim1, UMAP.dim2, colour = source), alpha = 0.75) +
theme_few()
averageGroupExpression <- function(exp, classes) {
c <- unique(classes)
means <- purrr::map_dfc(c, function(x) {
data.frame(rowMeans(exp[, classes == x]))
})
means <- as.matrix(means)
colnames(means) <- c
return(means)
}
av.exp <- averageGroupExpression(
getData(cObjSng, "counts.cpm")[getData(cObjMul, "features"), ],
getData(cObjSng, "classification")
)
p.cor <- cor(av.exp, method = "pearson")
p.cor %>%
matrix_to_tibble("to") %>%
gather(from, cor, -to) %>%
mutate(
from = parse_factor(from, levels = c.order),
to = parse_factor(to, levels = c.order)
) %>%
ggplot() +
geom_tile(aes(from, to, fill = cor)) +
scale_fill_viridis_c() +
theme_few() +
theme(
axis.text.x = element_text(angle = 90),
axis.title = element_blank()
) +
guides(fill = guide_colorbar(title = "Pearson's\ncorrelation"))
adj <- adjustFractions(cObjSng, cObjMul, sObj)
## Warning in .checkEstimateCellsInput(counts.ercc): Results will not be
## accurate. These samples ERCC reads are all 0's: s.SRR5103562, s.SRR5102813,
## s.SRR5102362, s.SRR5102747, s.SRR5103063, ...
## Warning in .checkEstimateCellsInput(counts.ercc): Results will not be
## accurate. These samples ERCC reads are all 0's: s.SRR5103562, s.SRR5102813,
## s.SRR5102362, s.SRR5102747, s.SRR5103063, ...
table(apply(adj, 1, sum)) / nrow(adj) * 100
| 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |
|---|---|---|---|---|---|---|---|---|
| 24.49198 | 41.39037 | 22.99465 | 7.807487 | 2.032086 | 0.855615 | 0.2139037 | 0.1069519 | 0.1069519 |
fractions <- getData(sObj, "fractions")
tibble(fractions = c(fractions)) %>%
ggplot() +
geom_histogram(aes(fractions), binwidth = 0.01) +
theme_bw()
tibble(
nCellTypes = apply(adj, 1, sum),
cost = getData(sObj, "costs")
) %>%
ggplot() +
geom_boxplot(aes(nCellTypes, cost, group = nCellTypes)) +
scale_x_continuous(name = "Detected cell types", breaks = 0:max(apply(adj, 1, sum))) +
theme_bw()
tibble(
sample = names(getData(sObj, "costs")),
cost = unname(getData(sObj, "costs"))
) %>%
inner_join(
select(estimateCells(cObjSng, cObjMul), sample, estimatedCellNumber),
by = "sample"
) %>%
mutate(estimatedCellNumber = round(estimatedCellNumber)) %>%
ggplot() +
geom_boxplot(aes(estimatedCellNumber, cost, group = estimatedCellNumber)) +
scale_x_continuous(
name = "ERCC estimated cell number"
#breaks = 0:max(round(pull(estimateCells(cObjSng, cObjMul), estimatedCellNumber)))
) +
theme_bw()
## Warning in .checkEstimateCellsInput(counts.ercc): Results will not be
## accurate. These samples ERCC reads are all 0's: s.SRR5103562, s.SRR5102813,
## s.SRR5102362, s.SRR5102747, s.SRR5103063, ...
ercc <- filter(estimateCells(cObjSng, cObjMul), sampleType == "Multiplet")
## Warning in .checkEstimateCellsInput(counts.ercc): Results will not be
## accurate. These samples ERCC reads are all 0's: s.SRR5103562, s.SRR5102813,
## s.SRR5102362, s.SRR5102747, s.SRR5103063, ...
nConnections <- apply(adj, 1, sum)
nConnections %>%
tibble(
sample = names(.),
detectedConnections = .
) %>%
inner_join(ercc) %>%
mutate(estimatedCellNumber = round(estimatedCellNumber)) %>%
ggplot() +
geom_boxplot(aes(estimatedCellNumber, detectedConnections, group = estimatedCellNumber)) +
scale_x_continuous(
name = "ERCC estimated cell number",
breaks = 0:max(round(ercc$estimatedCellNumber))
) +
scale_y_continuous(
name = "Detected cell number",
breaks = 0:max(round(nConnections))
) +
theme_bw()
## Joining, by = "sample"
tibble(
sample = names(nConnections),
detectedConnections = nConnections
) %>%
inner_join(tibble(
sample = colnames(getData(cObjMul, "counts")),
total.counts = colSums(getData(cObjMul, "counts"))
), by = "sample") %>%
ggplot() +
geom_boxplot(aes(detectedConnections, total.counts, group = detectedConnections)) +
scale_x_continuous(
name = "Detected cell number",
breaks = 0:max(nConnections)
) +
scale_y_continuous(name = "Total counts") +
theme_bw()
tibble(
sample = names(nConnections),
detectedConnections = nConnections
) %>%
inner_join(tibble(
sample = colnames(getData(cObjMul, "counts")),
total.ercc = colSums(getData(cObjMul, "counts.ercc"))
), by = "sample") %>%
ggplot() +
geom_boxplot(aes(detectedConnections, total.ercc, group = detectedConnections)) +
scale_x_continuous(
name = "Detected cell number",
breaks = 0:max(nConnections)
) +
scale_y_continuous(name = "Total ERCC counts") +
theme_bw()
plotSwarmCircos(sObj, cObjSng.enge, cObjMul, classOrder = c.order)
Only detected duplicates and triplicates.
Only ERCC estimated cell number <= 4.
Weight cutoff = 5.
adj <- adjustFractions(cObjSng.enge, cObjMul, sObj, binary = TRUE)
samples <- rownames(adj)
rs <- rowSums(adj)
keep1 <- rs == 2 | rs == 3
keep2 <- samples %in% filter(estimateCells(cObjSng, cObjMul), estimatedCellNumber <= 4)$sample
## Warning in .checkEstimateCellsInput(counts.ercc): Results will not be
## accurate. These samples ERCC reads are all 0's: s.SRR5103562, s.SRR5102813,
## s.SRR5102362, s.SRR5102747, s.SRR5103063, ...
plotSwarmCircos(
filterSwarm(sObj, keep1 & keep2), cObjSng.enge, cObjMul, weightCut = 5,
classOrder = c.order)